python train.py  --do_train True --model_name_or_path /data/models/Qwen/Qwen3-8B --preprocessing_num_workers 2 --finetuning_type lora --template qwen3  --dataset_dir data --dataset alpaca_zh_demo --cutoff_len 2048 --learning_rate 5e-05 --num_train_epochs 3.0 --max_samples 100000 --per_device_train_batch_size 2 --gradient_accumulation_steps 8 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 5 --save_steps 100 --warmup_steps 0 --packing False --enable_thinking True --report_to none --output_dir saves\Qwen3-8B-Instruct\lora\train_2025-07-13-20-56-41 --bf16 True --plot_loss True --trust_remote_code True --ddp_timeout 180000000 --include_num_input_tokens_seen True --optim adamw_torch --lora_rank 8 --lora_alpha 16 --lora_dropout 0 --lora_target all